{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regression Analysis for improve performace"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Essential Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import cv2\n",
    "\n",
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ECG Metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sampleFolders = \"../ecgs/set-a\"\n",
    "\n",
    "ecgHeaders = [\n",
    "\t\"sample\",\n",
    "\t\"LI\", \n",
    "\t\"LII\", \n",
    "\t\"LIII\", \n",
    "\t\"aVR\", \n",
    "\t\"aVF\", \n",
    "\t\"aVL\",\n",
    "\t\"V1\",\n",
    "\t\"V2\",\n",
    "\t\"V3\",\n",
    "\t\"V4\",\n",
    "\t\"V5\",\n",
    "\t\"V6\"\n",
    "]\n",
    "\n",
    "acceptableEcgsFiles = []\n",
    "\n",
    "with open(\"../ecgs/labels/RECORDS-acceptable\") as file:\n",
    "\tacceptableEcgsFiles = [id.strip() for id in file.readlines()]\n",
    "\n",
    "print(acceptableEcgsFiles[:5])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Correlation Matrix images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for ecgId in acceptableEcgsFiles:\n",
    "\tecgDf = pd.read_csv(\n",
    "\t\tfilepath_or_buffer = f\"{sampleFolders}/{ecgId}.txt\",\n",
    "\t\theader = None,\n",
    "\t\tindex_col = 0,\n",
    "\t\tnames = ecgHeaders \n",
    "\t)\n",
    "\n",
    "\t_, axes = plt.subplots()\n",
    "\n",
    "\tcorrelations = ecgDf.corr(method = \"pearson\")\n",
    "\n",
    "\timage = axes.imshow(\n",
    "\t\tcorrelations, \n",
    "\t\taspect = \"auto\"\n",
    "\t)\n",
    "\n",
    "\taxes.set_title(\"Correlação entre as derivações\")\n",
    "\n",
    "\taxes.set_xticks(np.arange(12), labels = ecgDf.columns)\n",
    "\taxes.set_yticks(np.arange(12), labels = ecgDf.columns)\n",
    "\n",
    "\tfor i in range(correlations.shape[0]):\n",
    "\t\tfor j in range(correlations.shape[1]):\n",
    "\t\t\tcorrelation = np.round(\n",
    "\t\t\t\tcorrelations.iloc[i, j], \n",
    "\t\t\t\t1\n",
    "\t\t\t)\n",
    "\n",
    "\t\t\taxes.text(\n",
    "\t\t\t\ti, j, \n",
    "\t\t\t\tcorrelation, \n",
    "\t\t\t\tha = \"center\", \n",
    "\t\t\t\tva = \"center\", \n",
    "\t\t\t\tcolor = \"red\" if correlation == 0 else \"w\"\n",
    "\t\t\t)\n",
    "\n",
    "\tplt.colorbar(image, ax = axes)\n",
    "\tplt.savefig(f\"./correlations/{ecgId}.png\")\n",
    "\tplt.close()\n",
    "\n",
    "\tdel ecgDf\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Correlation Matrix Video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "correlationsImages = os.listdir(\"./correlations\")\n",
    "\n",
    "frame = cv2.imread(f\"./correlations/{correlationsImages[0]}\")\n",
    "height, width, _ = frame.shape\n",
    "\n",
    "video = cv2.VideoWriter(\n",
    "    filename = \"correlations.avi\", \n",
    "    fourcc = cv2.VideoWriter_fourcc(*\"XVID\"),\n",
    "    fps = 1, \n",
    "    frameSize = (width, height)\n",
    ")\n",
    "\n",
    "for image in correlationsImages:\n",
    "    video.write(cv2.imread(f\"./correlations/{image}\"))\n",
    "\n",
    "cv2.destroyAllWindows()\n",
    "video.release()\n",
    "\n",
    "# ffmpeg -i correlations.avi -c:v libx264 -crf 23 correlations.mp4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RegTables Images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "recHeaders = set(ecgHeaders) - set([\"LI\", \"LII\", \"V2\", \"sample\"])\n",
    "print(recHeaders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regression = LinearRegression()\n",
    "\n",
    "for ecgId in acceptableEcgsFiles:\n",
    "\tecgDf = pd.read_csv(\n",
    "\t\tfilepath_or_buffer = f\"{sampleFolders}/{ecgId}.txt\",\n",
    "\t\theader = None,\n",
    "\t\tindex_col = 0,\n",
    "\t\tnames = ecgHeaders \n",
    "\t)\n",
    "\t\n",
    "\tregressionDf = pd.DataFrame(\n",
    "\t\tcolumns = [\"lead\", \"LI\", \"LII\", \"V2\", \"Intercept\"],\n",
    "\t)\n",
    "\t\n",
    "\tfor recLead in recHeaders:\n",
    "\t\tregression.fit(\n",
    "\t\t\tX = ecgDf[[\"LI\", \"LII\", \"V2\"]][100: 500], \n",
    "\t\t\ty = ecgDf[recLead][100: 500]\n",
    "\t\t)\n",
    "\t\t\n",
    "\t\tregressionDf.loc[len(regressionDf)] = [\n",
    "\t\t\trecLead,\n",
    "\t\t\tregression.coef_[0],\n",
    "\t\t\tregression.coef_[1],\n",
    "\t\t\tregression.coef_[2],\n",
    "\t\t\tregression.intercept_\n",
    "\t\t]\n",
    "\n",
    "\t\trecLeadSeries = regression.predict(X = ecgDf[[\"LI\", \"LII\", \"V2\"]])\n",
    "\t\trecLeadSeries = pd.Series(recLeadSeries, index = ecgDf.index)\n",
    "\t\t\n",
    "\t\tecgDf[recLead] = recLeadSeries\n",
    "\n",
    "\t\n",
    "\tfigure, axes = plt.subplots()\n",
    "\n",
    "\taxes.axis(\"off\")\n",
    "\n",
    "\tfigure.suptitle(\"Tabelas das regressões usadas para cada derivação\")\n",
    "\n",
    "\tfigure.subplots_adjust(top = .5)\n",
    "\n",
    "\tregressionDf = regressionDf.set_index(\"lead\")\n",
    "\n",
    "\tplt.table(\n",
    "\t\tcellText = np.round(regressionDf.values, 5),\n",
    "\t\tcolLabels = regressionDf.columns,\n",
    "\t\t\n",
    "\t\trowLabels = regressionDf.index,\n",
    "\t\t\n",
    "\t\tloc = \"center\",\n",
    "\t)\n",
    "\n",
    "\tplt.tight_layout(pad = 1.5)\n",
    "\n",
    "\tplt.savefig(f\"./regtables/{ecgId}.png\")\n",
    "\n",
    "\tplt.close()\n",
    "\n",
    "\tdel regressionDf\n",
    "\tdel ecgDf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RegTables Video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regTableImages = os.listdir(\"./regtables\")\n",
    "\n",
    "frame = cv2.imread(f\"./regtables/{regTableImages[0]}\")\n",
    "height, width, _ = frame.shape\n",
    "\n",
    "video = cv2.VideoWriter(\n",
    "    filename = \"regtables.avi\", \n",
    "    fourcc = cv2.VideoWriter_fourcc(*\"XVID\"),\n",
    "    fps = 1, \n",
    "    frameSize = (width, height)\n",
    ")\n",
    "\n",
    "for image in regTableImages:\n",
    "    video.write(cv2.imread(f\"./regtables/{image}\"))\n",
    "\n",
    "cv2.destroyAllWindows()\n",
    "video.release()\n",
    "\n",
    "# ffmpeg -i regtables.avi -c:v libx264 -crf 23 regtables.mp4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ECG regression coefficients dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regression = LinearRegression()\n",
    "\n",
    "regressionDf = pd.DataFrame(\n",
    "\tcolumns = [\"ecgId\", \"lead\", \"LI\", \"LII\", \"V2\", \"Intercept\"],\n",
    ")\n",
    "\n",
    "for ecgId in acceptableEcgsFiles:\n",
    "\tecgDf = pd.read_csv(\n",
    "\t\tfilepath_or_buffer = f\"{sampleFolders}/{ecgId}.txt\",\n",
    "\t\theader = None,\n",
    "\t\tindex_col = 0,\n",
    "\t\tnames = ecgHeaders \n",
    "\t)\n",
    "\t\n",
    "\tfor recLead in recHeaders:\n",
    "\t\tregression.fit(\n",
    "\t\t\tX = ecgDf[[\"LI\", \"LII\", \"V2\"]][100: 500], \n",
    "\t\t\ty = ecgDf[recLead][100: 500]\n",
    "\t\t)\n",
    "\t\t\n",
    "\t\tregressionDf.loc[len(regressionDf)] = [\n",
    "\t\t\tecgId,\n",
    "\t\t\trecLead,\n",
    "\t\t\tregression.coef_[0],\n",
    "\t\t\tregression.coef_[1],\n",
    "\t\t\tregression.coef_[2],\n",
    "\t\t\tregression.intercept_\n",
    "\t\t]\n",
    "\n",
    "\t\trecLeadSeries = regression.predict(X = ecgDf[[\"LI\", \"LII\", \"V2\"]])\n",
    "\t\trecLeadSeries = pd.Series(recLeadSeries, index = ecgDf.index)\n",
    "\t\t\n",
    "\t\tecgDf[recLead] = recLeadSeries\n",
    "\t\n",
    "\tdel ecgDf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(np.round(regressionDf.describe(), 3))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
